home
***
CD-ROM
|
disk
|
FTP
|
other
***
search
/
PC Media 22
/
PC MEDIA CD22.iso
/
share
/
prog
/
datalib2
/
index.cpp
< prev
next >
Wrap
C/C++ Source or Header
|
1995-08-14
|
17KB
|
540 lines
#include "datapriv.hpp"
/*
This file contains the routines concerned with the generation and use
of index files
*/
/***************************************************************************
Verify Index
The following routine verifys an index by opening a record on that index
and checking that all records are present and in order
***************************************************************************/
long clusnrec; // Total number of records found in clusters
int database::verifyindex(char *ind,int depth,void (*progress)(int,long),int order)
{
if (!getindkey(ind)) return INVIND; // Must be valid attached index
record rec(*this,ind);
char res[128],resn[128];
int rtype;
char iexp[128];
strcpy(iexp,getindkey(ind));
rec.select(FIRST,ALL);
if (!depth) return 0;
// Write all changed clusters to the index file
indclus *ip=rec.curind->icp->oi->topind;
while(ip)
{
if (ip->change && ip->valid)
{
ip->change=0;
Fseek(ip->fp,ip->clusn*512L,SEEK_SET); Fwrite(ip->clusdat,1,512,ip->fp);
}
ip=ip->next;
}
if (progress) (*progress)(1,0);
clusnrec=0;
if (!rec.curind->icp->oi->topind->verclus(progress,order)) return ERRINTREE;
if (clusnrec!=getnrec()) return INCOMP;
if (depth<2) return 0;
rec.eval(iexp,res,rtype);
for(long i=1; i<getnrec(); i++) // Depth=2, check out record order
{
if (progress)
{
if (i==1) (*progress)(2,i);
if (!(i%order)) (*progress)(2,i);
}
if (rec.select(NEXT,ALL)) return INCOMP;
rec.eval(resn,rtype);
double comp;
switch(rtype)
{
case OPINT : comp=*(double *)resn-*(double *)res; break;
case OPDATE: comp=atol(resn)-atol(res);
case OPSTR : comp=strcmp(resn,res);
}
if (comp<0) return OORD;
memcpy(res,resn,100);
}
if (depth<3) return 0;
// Depth 3, check all records present in index
record rp(*this); rp.select(FIRST);
record frp(*this,ind);
for(i=1; i<=getnrec(); i++)
{
if (progress)
{
if (i==1) (*progress)(3,i);
if (!(i%order)) (*progress)(3,i);
}
rp.eval(getindkey(ind),res,rtype);
if (frp.selkey(res,READIND,rtype)) return RECNOTFND;
while(rp.getrecnum()!=frp.getrecnum()) frp.selkey();
rp.select(NEXT,ALL);
}
return 0;
}
// Verify a cluster tree, return last key if OK, return 0 on error
// If record cluster then add number of records to clusnrec
char *indclus::verclus(void (*progress)(int,long),int order)
{
long nrec; // Points to each record in index
char *lrec,*nkey;
indclus *ni;
int i,created;
if (clustyp) // Block cluster
{
for(i=0; i<nclusrec; i++) // Check each cluster in the block
{
int err=0;
nrec=*(long *)(clusdat+4+i*reclen); // Next block down
nkey=clusdat+12+i*reclen; // Last key of next block down
ni=new indclus(nrec,0,oi); // Next index down
lrec=ni->verclus(progress,order);
if (!lrec) err=1; // Error in next block down
else
switch(oi->restype) // Check last key in child matches this
{
case OPSTR : if (strncmp(nkey,lrec,explen)) err=1; break;
case OPINT : if (*(double *)nkey-*(double *)lrec) err=1; break;
case OPDATE : if (strncmp(nkey,lrec,8)) err=1; break;
}
delete ni;
if (err) return 0;
}
}
else // Record cluster
{
long oclus=clusnrec;
clusnrec+=nclusrec;
if (progress && (oclus/order!=clusnrec/order)) (*progress)(1,clusnrec);
}
char *rv=(clusdat+12+reclen*(nclusrec-1));
return rv;
}
/***************************************************************************
BUILD INDEX
The following routines and structures are concerned with building an
index from scratch. A flat file of unsorted record clusters is built,
these are then sorted, and then the tree is built at the end of the file
finally the header record is written
***************************************************************************/
// The following holds a disk buffer, there are DBUF of these.
#define DBUF 5 // Number of disk clusters buffered in indexes
struct _dbuf
{
char buf[512]; // Holds the data from disk
char uflag; // Counter increments once for every use
long dpos; // Position in disk file
int cval; // contents have changed if cval==1
};
_dbuf *_dbp; // Points to disk buffer array
/************** Build Index ... This routine builds an index ************/
// Returns 0 on success, error otherwise
int database::buildindex(char *expr,char *fname,void (*progress)(int,long),int order)
{
record rec(*this,NOIND); // record
int indlen,indtype; // Index length and type
int tindlen; // total index length with pad
char ws[128],*wsp; // Work space, generic pointer
FILE *ifp; // Index file pointer
int ipad; // No. of characters to pad out index result
int nclusrec; // No. of records/cluster
char buf[512]; // Useful buffer
long i,j,k;
int dum;
long topclus=1; // Cluster at top of tree
int rv=0;
if (nrec) rv=rec.select(FIRST,ALL);
if (!(indlen=rec.indchk(expr,indtype)) || rv) return EXPRERR;
ipad=(4-indlen%4)%4;
tindlen=indlen+ipad;
nclusrec=504/(indlen+ipad+8);
strcpy(ws,fname);
if (wsp=strchr(ws,'.')) *wsp=0;
strcat(ws,".NDX");
if (!(ifp=fopen(ws,"w+b"))) return NOINDEX;
Fwrite(buf,512,1,ifp); // Write a blank header (fill it in later)
rec.eval(expr,ws,dum); // Tokenise expression
if (nrec)
{
long nwrit=0;
while(!rv) // Write out all records & index expressions
{
long nwr=0; // No. of records written
int dum;
char *ip=buf+4; // Pointer to next free space in buffer
for(i=0; i<nclusrec; i++)
{
if (rv) break;
if (!rec.eval(ws,dum)) {fclose(ifp); return EXPRERR;}
*(long *)ip=0; ip+=4; *(long *)ip=rec.getrecnum(); ip+=4; // rec. no.
switch(indtype) // Result
{
case OPINT : *(double *)ip=*(double *)ws; ip+=sizeof(double); break;
case OPSTR : wsp=ws; while(*wsp++); wsp--; k=wsp-ws;
for(j=0; j<tindlen-k; j++) *wsp++=' ';
strncpy(ip,ws,tindlen); ip+=tindlen;
break;
case OPDATE : strncpy(ip,ws,8); ip+=tindlen; break;
}
nwr++; nwrit++;
if (progress && (nwrit==1 || !(nwrit%order))) (*progress)(1,nwrit);
rv=rec.select(NEXT,ALL);
}
*(double *)ip=0;
*(long *)buf=(long)nwr;
Fwrite(buf,512,1,ifp);
}
_dbp=new _dbuf[DBUF];
indsort(ifp,indtype,tindlen+8,indlen,nrec,nclusrec,progress,order);
delete _dbp;
if (progress) (*progress)(3,1);
buildtree(ifp,tindlen+8,indlen,nclusrec,nrec/nclusrec+1,topclus);
}
Fseek(ifp,0L,SEEK_SET); // Now write header
memset(buf,0,512);
*(long *)buf=topclus; *(long *)(buf+4)=topclus+1;
switch(indtype)
{
case OPINT : buf[9]='N'; buf[16]=1; break;
case OPDATE: buf[9]='D'; buf[16]=0; break;
case OPSTR : buf[9]='C'; break;
}
buf[12]=indlen;
buf[14]=nclusrec;
buf[18]=indlen+ipad+8;
strcpy(buf+24,expr);
Fwrite(buf,512,1,ifp);
if (!nrec) {memset(buf,0,512); Fwrite(buf,512,1,ifp);}
fclose(ifp);
return 0;
}
/********** Index sort ******************************************************/
// These functions implement a full index sort on the unsorted index
// file.
// The following structure is a buffer which holds a cluster from
// the index file, whenever 2 elements are compared or sorted, the
// buffers are checked first to see if the elements are contained therein
struct _ibf
{
char *buf; // Points to character data
_dbuf *dbuf; // Points to disk buffer associated with index
long low; // Lowest element number in buffer
long up; // Highest element number in buffer
long n; // Element number in buffer to examine
char *an; // Address of element number in buffer to examine
char *tn; // Points to element data
long dpos; // Position in disk file
FILE *fp; // File pointer to index file
int nclusrec; // No. of records/cluster
int indlen; // index length
int indexplen; // index expression length
int indtype; // Index type
_ibf(FILE *,int,int,int,int);
// Constructor initialises buffer
~_ibf(); // Destructor flushes buffer to disk
void setn(long n); // Set element no. of buffer to n
// This will check if n is in the buffer
// and if it is not will save the buffer
// and update the buffer with new value
void indswap(struct _ibf &s); // Swap element with s
int indcomp(struct _ibf &s); // Compare element with s
};
_ibf::_ibf(FILE *ifp,int inclusrec,int iindlen,int iindexplen,
int iindtype)
{
fp=ifp;
nclusrec=inclusrec;
indlen=iindlen;
indexplen=iindexplen;
indtype=iindtype;
up=-1; // Flag dbuf is invalid
dbuf=0;
}
_ibf::~_ibf()
{
dbuf->uflag--;
if (!(dbuf->uflag) && dbuf->cval)
{
Fseek(fp,512L*dpos,SEEK_SET); Fwrite(buf,512,1,fp);
}
}
// Set element number in buffer
// If we are simply changing to another element in the same buffer then
// just move the pointers. If we are moving buffers then check if the
// current buffer should be written back to disk, see if anyone else
// is using the new buffer & if so grab it, if not then set up a new
// buffer holding the cluster we require from disk.
void _ibf::setn(long nw)
{
n=nw;
if (nw<low || nw>up) // Update buffer ?
{
if (dbuf) // Only if dbuf is valid
{
dbuf->uflag--;
if (dbuf->cval && !dbuf->uflag)
{
Fseek(fp,512L*dpos,SEEK_SET); Fwrite(buf,512,1,fp);
}
}
dpos=n/nclusrec+1;
int fflag=0; // Flags buffer already exists
_dbuf *nfree; // Points to a free buffer
for(int i=0; i<DBUF; i++) // Now do we already have this buffer ?
{
if (_dbp[i].dpos==dpos) {fflag=1; break;}
if (!(_dbp[i].uflag)) nfree=&(_dbp[i]);
}
if (fflag) // Someone else has grabbed this buffer for us
{
dbuf=&(_dbp[i]); buf=dbuf->buf; dbuf->uflag++;
}
else // We'll grab a new buffer
{
nfree->uflag=1; nfree->dpos=dpos; nfree->cval=0; buf=nfree->buf;
Fseek(fp,512L*dpos,SEEK_SET); Fread(buf,512,1,fp);
dbuf=nfree;
}
low=(dpos-1)*nclusrec;
up=dpos*nclusrec-1;
}
an=buf+4+(n-low)*indlen; tn=an+8;
}
// This function swaps the current element with the current element of s
void _ibf::indswap(_ibf &s)
{
char temp[110];
memcpy(temp,an,indlen);
memcpy(an,s.an,indlen);
memcpy(s.an,temp,indlen);
dbuf->cval=1; s.dbuf->cval=1; // Show contents have changed
}
// This function compares the current element with the current element of s
int _ibf::indcomp(_ibf &s)
{
// if (indtype==OPSTR) return(strncmp(tn,s.tn,indexplen));
if (indtype!=OPINT) return(strncmp(tn,s.tn,indexplen));
double dr=(*(double *)(tn))-(*(double *)(s.tn));
if (dr>0) return 1;
if (dr<0) return -1;
return 0;
}
// The following function takes a pointer to an index file, and
// sorts the elements in it, it is a quicksort derived from the Zortech
// qsort function itself derived from the Ray Gardner public domain function
// indtype is the type of result, indlen the length of index+pad+pointers,
// indexplen is the length of index expression alone, nel is the no. of
// elements, and nclusrec the number of records/cluster
void database::indsort(FILE *ifp,int indtype,int indlen,int indexplen,
long nel,int nclusrec,void (*progress)(int,long),int order)
{
long nswap=0;
long lastorder=-1;
long stack[40],*sp; // stack and stack pointer
_ibf i(ifp,nclusrec,indlen,indexplen,indtype); // scan and limit pointers
_ibf j(ifp,nclusrec,indlen,indexplen,indtype);
_ibf limit(ifp,nclusrec,indlen,indexplen,indtype);
_ibf base(ifp,nclusrec,indlen,indexplen,indtype);
_ibf temp(ifp,nclusrec,indlen,indexplen,indtype);
for(int x=0; x<DBUF; x++) {_dbp[x].dpos=-1; _dbp[x].uflag=0;} // Init buffers
sp=stack; // Init stack pointer
base.setn(0);
limit.setn(nel); /* pointer past end of array */
while (1) /* repeat until done then return */
{
if (nswap>lastorder && progress)
{(*progress)(2,nswap); lastorder=nswap+order;}
while (limit.n-base.n>5) // Max span=5, otherwise insertion sort
{
temp.setn(((unsigned long)(limit.n-base.n)>>1)+base.n); // swap middle, base
temp.indswap(base); nswap++;
i.setn(base.n+1); /* i scans from left to right */
j.setn(limit.n-1); /* j scans from right to left */
if (i.indcomp(j)>0) {i.indswap(j); nswap++;} // Sedgewick's
// three-element sort
if (base.indcomp(j)>0) {base.indswap(j); nswap++;} // sets things up
// so that
if (i.indcomp(base)>0) {i.indswap(base); nswap++;} // i <= base <= j
// base is the pivot element
while (1)
{
do i.setn(i.n+1); /* move i right until i >= pivot */
while (i.indcomp(base)<0);
do j.setn(j.n-1); /* move j left until j <= pivot */
while (j.indcomp(base)>0);
if (i.n>j.n) break; /* break loop if pointers crossed */
i.indswap(j); /* else swap elements, keep scanning */
nswap++;
}
nswap++;
base.indswap(j); /* move pivot into correct place */
if (j.n-base.n>limit.n-i.n) /* if left subfile is larger... */
{
sp[0]=base.n; sp[1]=j.n; /* stack left subfile base */
base.setn(i.n); /* and limit */
} /* sort the right subfile */
else /* else right subfile is larger */
{
sp[0]=i.n; sp[1]=limit.n; /* stack right subfile base */
limit.setn(j.n); /* and limit */
} /* sort the left subfile */
sp+=2; /* increment stack pointer */
}
// Insertion sort on remaining subfile
i.setn(base.n+1);
while (i.n<limit.n)
{
j.setn(i.n);
temp.setn(j.n-1);
while (j.n>base.n && temp.indcomp(j)>0)
{
temp.indswap(j); nswap++;
j.setn(j.n-1);
temp.setn(j.n-1);
}
i.setn(i.n+1);
}
if (sp>stack) /* if any entries on stack... */
{
sp-=2; /* pop the base and limit */
base.setn(sp[0]);
limit.setn(sp[1]);
}
else break; /* else stack empty, all done */
}
}
// This routine builds an index tree in a sorted flat index file
// indlen is length of index + pointers
// nrec is no. of records
// nclusrec is no. of records/cluster
// nclus is no. of clusters at the level of tree being indexed
// indexplen is the index expression length
// topclus is returned as the top cluster in the tree
void database::buildtree(FILE *fp,int indlen,int indexplen,
int nclusrec,long nclus,long &topclus)
{
long fclus=1; // First cluster of this level of tree
long flclus; // First cluster of level being written
char buf[512],ibuf[512]; // Holds current cluster & cluster from disk
int recclus=1; // Flags we are looking at a record cluster
if (nclus==1) flclus=1;
while(nclus>1)
{
long tlclus=0; // Totals clusters written at this level
long nclev=nclus/nclusrec; // No. of clusters to write at this level
Fseek(fp,0L,SEEK_END);
flclus=ftell(fp)/512L; // First cluster at this level
for(long i=0; i<=nclev; i++) // For each cluster at this level
{
Fseek(fp,fclus*512L,SEEK_SET); // Find first cluster to be examined
char *cop=buf+4; // Location to copy record to
int nrecclus=0; // No. of records in cluster
for(int j=0; j<nclusrec; j++)
{
char *lr; // points to last record in cluster
if (!(nclus--)) break; // Last cluster yet ?
Fread(ibuf,512,1,fp); // Cluster to be examined
lr=ibuf+4+(*(long *)ibuf-recclus)*indlen; // Last record in cluster
*(long *)cop=fclus++; cop+=sizeof(long); // Record number
*(long *)cop=0; cop+=sizeof(long);
memcpy(cop,lr+8,indexplen); // Copy in last record
cop+=indlen-8;
nrecclus++;
}
tlclus++;
*(long *)buf=nrecclus-1; // No. of records in cluster
Fseek(fp,0L,SEEK_END); Fwrite(buf,512,1,fp); // move to file end & write
}
fclus=flclus; nclus=tlclus; // Now index level just done
if (recclus) recclus=0; // Now looking at blocks
}
topclus=flclus; // Top cluster in tree
}